EIA API - Data Backfill

The goal of this doc is to execute an initial data pull of the hourly demand for California balancing authority subregion (CISO). This includes the following four independent system operators:

The data backfill process includes the following steps:

Load libraries

Code
import eia_api as api
import eia_etl as etl
import eia_metadata as eia_meta
import pandas as pd
import numpy as np
import requests
import json
import os
import datetime
import plotly.express as px
Code
raw_json = open("../metadata/series.json")
meta_json = json.load(raw_json)
series = pd.DataFrame(meta_json["series"])
api_path = meta_json["api_path"]
Code
facets_template = {
  "parent" : None,
  "subba" : None
}

start = datetime.datetime(2018, 6, 20, 1)
end = datetime.datetime(2024, 2, 18, 1)

offset = 2250

eia_api_key = os.getenv('EIA_API_KEY')

meta_path = "../metadata/ciso_log_py.csv"
data_path = "../csv/ciso_grid_py.csv"
Code
metadata = api.eia_metadata(api_key = eia_api_key, api_path = api_path)
metadata.meta.keys()
print(metadata.meta["startPeriod"])
print(metadata.meta["endPeriod"])
2018-06-19T05
2024-02-20T08
Code
start = datetime.datetime(2018, 7, 1, 1)
# start = datetime.datetime(2024, 1, 1, 1)
end = datetime.datetime(2024, 2, 18, 1)
for i in series.index:
  facets = facets_template
  facets["parent"] = series.at[i, "parent_id"]
  facets["subba"] = series.at[i, "subba_id"]
  print(facets)
  temp = api.eia_backfile(api_key = eia_api_key, 
        api_path = api_path+ "data", 
        facets = facets, 
        start = start,
        end = end,
        offset = offset) 
  ts_obj = pd.DataFrame(np.arange(start = start, stop = end + datetime.timedelta(hours = 1), step = datetime.timedelta(hours = 1)).astype(datetime.datetime), columns=["index"])
  ts_obj  = ts_obj.merge(temp.data, left_on = "index", right_on = "period", how="left")
  ts_obj.drop("period", axis = 1, inplace= True)
  ts_obj = ts_obj.rename(columns= {"index": "period"})

  meta_temp = eia_meta.create_metadata(data = ts_obj, start = start, end = end, type = "backfile")
  meta_temp["index"] = 1
  meta_df = pd.DataFrame([meta_temp])

  if i == series.index.start:
    data = ts_obj
    meta = meta_df
  else:
    data = data._append(ts_obj)
    meta = meta._append(meta_df)
{'parent': 'CISO', 'subba': 'PGAE'}
{'parent': 'CISO', 'subba': 'SCE'}
{'parent': 'CISO', 'subba': 'SDGE'}
{'parent': 'CISO', 'subba': 'VEA'}
Code
print(meta)

# The initial pull has some missing values
meta["success"] = True
# Save the data
data.to_csv(data_path)
meta["update"] = True
m = eia_meta.append_metadata(meta_path = "../metadata/ciso_log_py.csv", meta = meta, save = True, init = True)
print(m)
   index parent subba                             time               start  \
0      1   CISO  PGAE 2024-02-21 14:25:14.116685+00:00 2018-07-01 01:00:00   
0      1   CISO   SCE 2024-02-21 14:25:27.859687+00:00 2018-07-01 01:00:00   
0      1   CISO  SDGE 2024-02-21 14:25:41.422542+00:00 2018-07-01 01:00:00   
0      1   CISO   VEA 2024-02-21 14:25:54.614824+00:00 2018-07-01 01:00:00   

                  end           start_act             end_act  start_match  \
0 2024-02-18 01:00:00 2018-07-01 01:00:00 2024-02-18 01:00:00         True   
0 2024-02-18 01:00:00 2018-07-01 01:00:00 2024-02-18 01:00:00         True   
0 2024-02-18 01:00:00 2018-07-01 01:00:00 2024-02-18 01:00:00         True   
0 2024-02-18 01:00:00 2018-07-01 01:00:00 2024-02-18 01:00:00         True   

   end_match  n_obs   na      type  update  success  \
0       True  49393  105  backfile   False    False   
0       True  49393  105  backfile   False    False   
0       True  49393  105  backfile   False    False   
0       True  49393  105  backfile   False    False   

                      comments  
0  Missing values were found;   
0  Missing values were found;   
0  Missing values were found;   
0  Missing values were found;   
   index parent subba                             time               start  \
0      1   CISO  PGAE 2024-02-21 14:25:14.116685+00:00 2018-07-01 01:00:00   
0      1   CISO   SCE 2024-02-21 14:25:27.859687+00:00 2018-07-01 01:00:00   
0      1   CISO  SDGE 2024-02-21 14:25:41.422542+00:00 2018-07-01 01:00:00   
0      1   CISO   VEA 2024-02-21 14:25:54.614824+00:00 2018-07-01 01:00:00   

                  end           start_act             end_act  start_match  \
0 2024-02-18 01:00:00 2018-07-01 01:00:00 2024-02-18 01:00:00         True   
0 2024-02-18 01:00:00 2018-07-01 01:00:00 2024-02-18 01:00:00         True   
0 2024-02-18 01:00:00 2018-07-01 01:00:00 2024-02-18 01:00:00         True   
0 2024-02-18 01:00:00 2018-07-01 01:00:00 2024-02-18 01:00:00         True   

   end_match  n_obs   na      type  update  success  \
0       True  49393  105  backfile    True     True   
0       True  49393  105  backfile    True     True   
0       True  49393  105  backfile    True     True   
0       True  49393  105  backfile    True     True   

                      comments  
0  Missing values were found;   
0  Missing values were found;   
0  Missing values were found;   
0  Missing values were found;   

Plot the Series

We will use Plotly to visualize the series:

Code
d = data.sort_values(by = ["subba", "period"])

p = px.line(data, x="period", y="value", color="subba")

p.show()